// Copyright 2024 The Google Research Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Copyright 2022 The Google Research Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Protocol buffers for the Small Molecule Universe Project
//
// TODO(pfr): add link to paper when available

// This is a machine readable file to produce code for reading the binary format
// in many languages. The file also serves as detailed documentation of all
// of the fields provieded in the dataset.

// For a description of protocol buffers and the language spec (if you can't
// figure it out from the names), see
// https://developers.google.com/protocol-buffers/

syntax = "proto3";


import "google/protobuf/descriptor.proto";


///////////// Custom options
// The dataset is released in two forms
// 1. COMPLETE: Has all molecules and almost all fields
// 2. STANDARD: Has molecules where calculations did not encounter serious
//    errors and a limited set of the most interesting fields.
//
// We use protobuf field options for marking the availability of fields in
// Properties in the different versions of the dataset. These types define those
// custom options.
// See https://developers.google.com/protocol-buffers/docs/proto#customoptions
//
// For each field, there will be an annotation like [(availability) = COMPLETE]
// indicating which version of the dataset contains this field. Everything in
// STANDARD is also included in COMPLETE.

enum AvailabilityEnum {
  UNSPECIFIED = 0;
  // INTERNAL_ONLY fields are not in any public version of the dataset
  INTERNAL_ONLY = 1;
  STANDARD = 2;
  COMPLETE = 3;
}

extend google.protobuf.FieldOptions {
  optional AvailabilityEnum availability = 336921174;
}

///////////// Primary data

// BondTopology specifies the atom types and connections.
message BondTopology {
  enum AtomType {
    // ATOM_UNDEFINED never occurs in final data.
    ATOM_UNDEFINED = 0;
    ATOM_C = 1;
    ATOM_N = 2;
    // NPOS is a positively charged nitrogen
    ATOM_NPOS = 3;
    ATOM_O = 4;
    // ONEG is a negatively charged oxygen
    ATOM_ONEG = 5;
    ATOM_F = 6;
    ATOM_H = 7;
  }

  // Note that there is no "aromatic" bond type.
  enum BondType {
    // BOND_UNDEFINED should never occur in final data
    BOND_UNDEFINED = 0;
    BOND_SINGLE = 1;
    BOND_DOUBLE = 2;
    BOND_TRIPLE = 3;
  }

  message Bond {
    int32 atom_a = 1;
    int32 atom_b = 2;
    BondType bond_type = 3;
  }

  // These values are for the source bitfield below
  enum SourceType {
    SOURCE_UNSET = 0;
    // Indicates that this was the topology used during the geometry creation
    // process itself. It will always co-occur with SOURCE_DDT
    SOURCE_STARTING = 1;
    // Indicates that this is a topology that matches the bond ranges used
    // derived from the observed distribution of bond lenths for molecules
    // with mostly successful calculations.
    // DDT means Distribution Derived Thresholds
    SOURCE_DDT = 2;
    // Indicates that this topology matches bond lengths determined by covalent
    // radii plus a tolerance factor.
    // Follows the procedure from:
    // Meng, E. C. & Lewis, R. A. Determination of molecular topology and atomic
    // hybridization states from heavy atom coordinates. J. Comput. Chem. 12,
    // 891–898 (1991)
    SOURCE_MLCR = 4;
    // Indicates that this topology matches bond lengths extracted from
    // the Cambridge Structural Database as complied by :
    // Allen, F. H. et al. Tables of bond lengths determined by X-ray and neutron
    // diffraction. Part 1. Bond lengths in organic compounds.
    // J. Chem. Soc. Perkin Trans. 2 S1–S19 (1987)
    // with a 3 sigma cutoff.
    SOURCE_CSD = 8;
    // This value will never appear in the published database, but if bond
    // lengths are modified for geometry based queries to the database, then this
    // value will be used.
    SOURCE_CUSTOM = 16;
  }

  // Note that this order of atoms will be used throughout the specification of
  // geometries below.
  repeated AtomType atom = 1;

  repeated Bond bond = 2;
  // The SMILES that are stored are canonical SMILES computed by RDKit. However
  // since we are not using aromatic bonds, there is one case that must be
  // special cased. See smu_utils_lib.py:compute_smiles_for_bond_topology
  string smiles = 3;

  // A unique 1 indexed dense id for this topology.
  int32 topo_id = 4;

  // Bitfield of values from SourceType indicating where this bond topology came from.
  int32 info = 10;

  // An estimate of ln(P(topology|geometry))
  //
  // This is a discrete distribution among all matching topologies
  // from this geometry. Since we are in log space, a value of 0 means
  // a probability of 1.0 and smaller negative values are smaller
  // probabilities.
  // Note that this score is *only* available for topologies with source
  // including SOURCE_DDT.
  // Across the SOURCE_DDT topologies, exp(topology_score) will sum to 1.
  float topology_score = 8;

  // An estimate of ln(p(geometry|topology)).
  //
  // This reflects how suprising this particular geometry is given the
  // topology. Note that this is a probability *density* not a
  // probability, so the scale can cover positive and negative numbers
  // with smaller numbers representing less likely geometries. This
  // score should *not* be used to compare across different
  // topologies. It is only interesting for a given topology to
  // compare different geometries.
  //
  // Note that this score is *only* available for topologies with source
  // including SOURCE_DDT.
  //
  // Note that only bonds between heavy atoms are used in computing this.
  float geometry_score = 9;

  // INTERNAL ONLY: This will not be available in the external database.
  //
  // If this has been discerned from geometry, the associated score.
  // Now, prefer topology_score and geometry_score to reflect the different
  // kinds of scores available.
  // This is still used in topology_molecule as a generic score value accumulator -
  // topology_molecule does not know what kind of score it is accumulating.
  float score = 5 [(availability) = INTERNAL_ONLY];

  // If this has been discerned from geometry, is it the same as the
  // starting BondTopology.
  // DEPRECATED: will not be set in final database.
  bool is_starting_topology = 6;

  // Number of atoms that are in a ring.
  // INTERNAL ONLY: This will not be available in the external database.
  int32 ring_atom_count = 7 [(availability) = INTERNAL_ONLY];
}

// When doing topology from geometry matching, the results are returned
// as a set of individual BondTopology's.
message TopologyMatches {
  repeated BondTopology bond_topology = 1;

  // It is convenient to have the starting smiles.
  string starting_smiles = 2;

  // And the mol_id
  int32 mol_id = 3;

  // To avoid having to join results later, store the fate.
  Properties.FateCategory fate = 4;
}

// Geometry captures the 3D locations of the atoms. The atom types are left
// out and are matched by atom index to a BondTopology.
message Geometry {
  // Units: Bohr
  message AtomPos {
    float x = 1;
    float y = 2;
    float z = 3;
  }
  repeated AtomPos atompos = 1
      [(availability) = STANDARD];

  // Energy and and overall gradient norm (gnorm) for the geometty.
  // The energy is computed by Turbomole with PBE0-D3/6-311G(d)
  // Units: atomic units (Hartrees)
  Properties.ScalarMolecularProperty energy = 2
      [(availability) = STANDARD];
  Properties.ScalarMolecularProperty gnorm = 3
      [(availability) = STANDARD];

  // Just the nuclear repulsion energy term for the energy.
  // Will only be available for the optimized geometry.
  // Units: atomic units (Hartree)
  Properties.ScalarMolecularProperty enuc = 4
      [(availability) = COMPLETE];

  // The eigenvalues of the moment of inertia tensor.
  // Will only be available for the optimized geometry.
  // Units: Mhz
  Properties.MultiScalarMolecularProperty brot = 5
      [(availability) = STANDARD];
}

// All the properties that are computed for a molecule.
message Properties {
  // Fate is a simple categorical summary of how this molecule progressed
  // through the whole pipeline of computations.
  enum FateCategory {
    // FATE_UNDEFINED should not be present in the final data
    FATE_UNDEFINED = 0;
    // Duplicated to another Molecule with the same bond topology. This
    // Molecule will not have most properties fields.
    FATE_DUPLICATE_SAME_TOPOLOGY = 1;
    // Duplicated to another Molecule with a different bond topology. This
    // Molecule will not have most properties fields.
    FATE_DUPLICATE_DIFFERENT_TOPOLOGY = 2;
    // Errors were encountered during geometry optimization. This
    // Molecule will not have most properties fields.
    FATE_FAILURE_GEO_OPT = 3;
    // After geometry optimization, the expected bonding pattern was no longer
    // consistent with the geometry. This Molecule will not have most
    // properties fields.
    FATE_FAILURE_TOPOLOGY_CHECK = 4;
    // During geometry optimization, problems were encountered converging to a
    // local minima. This Molecule will not have most properties fields.
    // TODO(pfr): check documentation
    FATE_FORCE_CONSTANT_FAILURE = 5;
    // Other problems were encountered during geometry optimization. This
    // Molecule will not have most properties fields.
    FATE_FAILURE_STAGE2 = 6;
    // Calculation results are missing. Should not be present in final dataset.
    FATE_FAILURE_NO_RESULTS = 7;
    // Some calculation results had serious/major/moderate errors.
    FATE_ERROR_SERIOUS = 8;
    FATE_ERROR_MAJOR = 10;
    FATE_ERROR_MODERATE = 11;
    // Some calculations had only minor errors with some warnings, with the
    // exception of cation energies.
    FATE_SUCCESS_NEUTRAL_WARNING_SERIOUS = 14;
    FATE_SUCCESS_NEUTRAL_WARNING_MEDIUM_VIB = 15;
    // Calculations were successful with only minor errors/warnings, with the
    // exception of cation energies
    FATE_SUCCESS_NEUTRAL_WARNING_LOW = 16;
    // Some calculations had only minor errors with some warnings.
    FATE_SUCCESS_ALL_WARNING_SERIOUS = 12;
    FATE_SUCCESS_ALL_WARNING_MEDIUM_VIB = 13;
    // Calculations were successful with only minor errors/warnings
    FATE_SUCCESS_ALL_WARNING_LOW = 9;
  }

  // Errors captures information about how the computation on this molecule
  // progressed.
  message Errors {
    // A simple categorical summary of the outcome.
    FateCategory fate = 40;

    // status is a summary of all the error conditions encountered.
    // 0 indicates no errors and larger values indicate increasingly serious
    // errors.
    int32 status = 29;

    // Specifies which database (STANDARD or COMPLETE) this Molecule
    // will be in. Note that everything in STANDARD is also included in COMPLETE.
    AvailabilityEnum which_database = 41;

    // All of the warn_ fields are categories of errors encountered.
    // 0 means no errors and larger values are increasingly serious.

    // T1(2sd) diagnostics 1: > 0.02, 2: > 0.04, 3: > 0.06, 4: > 0.08
    int32 warn_t1 = 30;
    // Excess T1(2sd) diagnostics 1: > 0.015, 2: > 0.03, 3: > 0.05, 4: > 0.07
    int32 warn_delta_t1 = 31;
    // Ratio between
    // a) usigned difference in B6 and B5 BSEs
    // b) given uncertainty of B6 BSEs
    // 1: > 0.5, 2: > 1, 3: > 1.5, 4: > 2.0
    int32 warn_bse_b6 = 32;
    // Ratio between
    // a) unsigned difference in E_CCSD and B5 BSEs
    // b) given uncertainty of E_CCSD BSEs
    // 1: > 0.5, 2: > 1,
    int32 warn_bse_eccsd = 33;
    // Lowest excitation energy 1: < 1.0, 2: < 0.5, 3: < 0.0, 4: < -0.5 (eV)
    int32 warn_exc_ene = 34;
    // Smallest oscillator strength 1: < -0.001
    // (i.e. negative value, certain to be not due to numerical inaccuracy)
    int32 warn_exc_osmin = 35;
    // Largest unsigned oscillator strength 1: > 1.0, 2: > 2.0
    int32 warn_exc_osmax = 36;
    // Disagreement between geometry analysis and QM program claim about
    // linearity of the molecule.
    // 1: QM claims non-linear, molecule seems to be linear
    // 2: QM claims linear, molecule seems to be non-linear
    // (note: value of 2 apparently not found for SMU1-7)
    // Warning indicates that computed heats of formation for 298.15 K
    // may be wrong)
    int32 warn_vib_linear = 37;
    // Imaginary frequencies found.
    // 1: At least one imaginary frequency found, all imaginary
    // frequencies are between 0 and 30i cm-1. In this case any
    // imaginary frequency is assumed to be due to numerical
    // imprec, and we set it to a small real value (1 cm-1) for
    // thermochemical analysis.
    // 2: At least one imaginary frequency found, at least one of
    // which is larger than 30i cm-1. In this case we assume that the
    // geometry really corresponds to a transition state or
    // higher-order saddle point. Imaginary frequencies are skipped in
    // the calculation of ZPEs and enthalpies of formation for 298 K
    // are left out.
    int32 warn_vib_imag = 38;
    // This is simply information about how many of the bonds are
    // classified as those that need an empirical correction in the
    // thermochemical analysis to correct for the occurrence of
    // negatively charged BSR prototype molecules.
    int32 warn_bsr_neg = 39;

    // These four errors (nstat1, nstatc, nstatt, frequencies) are only from
    // the stage1 files and are only useful for internal debugging.
    int32 error_nstat1 = 24;
    int32 error_nstatc = 25;
    int32 error_nstatt = 26;
    // Computation error for frequencies.
    // Original error code: nstatv
    int32 error_frequencies = 2;

    // All errors below here are deprecated and were only used in
    // earlier versions of the dataset.

    // ATOMIC analysis of the molecule.
    // Original error code: nsvato
    int32 error_atomic_analysis = 3;
    // Failed to compute NMR values with B3LYP small basis set (6-31++G**).
    // Original error code: nsvnsb
    int32 error_nmr_analysis_b3lyp_small_basis = 4;
    // Failed to compute NMR values with B3LYP large basis set (aug-pcS-1).
    // Original error code: nsvnlb
    int32 error_nmr_analysis_b3lyp_large_basis = 5;
    // Failed to compute NMR values with PBE0 small basis set (6-31++G**).
    // Original error code: nsvnsp
    int32 error_nmr_analysis_pbe0_small_basis = 6;
    // Failed to compute NMR values with PBE0 large basis set (aug-pcS-1).
    // Original error code: nsvnlp
    int32 error_nmr_analysis_pbe0_large_basis = 7;
    // Charge analysis failed.
    // Original error code: nsvele
    int32 error_charge_analysis = 8;
    // Computation error during energies / orbitals calculations for HF/cc-pVTZ.
    // Original error code: nsveh3
    int32 error_energies_orbitals_pvtz = 9;
    // Computation error during energies / orbitals calculations for HF/cc-pVQZ.
    // Original error code: nsveh4
    int32 error_energies_orbitals_pvqz = 10;
    // Computation error during energies / orbitals calculations for
    // HF/cc-pCVTZ.
    // Original error code: nsvec3
    int32 error_energies_orbitals_pcvtz = 11;
    // Computation error during computing excitation energies.
    // Original error code: nsvexc
    int32 error_excitation_energies = 12;
    // Turbomole single point calculations for PBE0/6-311G* looked faulty.
    // Original error code: nsveca
    int32 error_single_point_energies = 13;
    // Turbmole and MRCC gave different results for molecular energy.
    // Original error code: nsvmr1
    int32 error_inconsistent_molecule_energy_turbomole_mrcc = 14;
    // Turbmole and MRCC gave different results for single point calculations.
    // Original error code: nsvmr2
    int32 error_inconsistent_cation_energy_turbomole_mrcc = 15;
    // Turbmole and ORCA gave different results for molecular energy.
    // Original error code: nsvor1
    int32 error_inconsistent_molecule_energy_turbomole_orca = 16;
    // Turbmole and ORCA gave different results for single point calculations.
    // Original error code: nsvor2
    int32 error_inconsistent_cation_energy_turbomole_orca = 17;
    // Normal mode calculations failed.
    // Original error code: nsvvib
    int32 error_vib_mode = 18;
    // TODO(pfr): Document this with Dirk.
    // Original error code: nsvrot
    int32 error_rotational_modes = 19;
    // TODO(pfr): Document and create better names.
    int32 error_nsvho1 = 20;
    int32 error_nsvho2 = 21;
    int32 error_nsvho3 = 22;
    int32 error_nsvneg = 23;
    int32 error_nsvego = 27;
    int32 error_nsvg09 = 28;

    // TODO(pfr): This is a temporary feature that we will deprecate. It's here
    // so that we could identify the small number of merging failures and figure
    // out what to do with them.
    string error_during_merging = 65;

    // Field numbers for deleted fields.
    reserved 1;
  }

  // First, a number of general message types are defined (*Property) that are
  // used for specific field values below.

  // A single string for the molecule.
  message StringMolecularProperty {
    string val = 1;
  }

  // A single value for the whole molecule.
  message ScalarMolecularProperty {
    double val = 1;
  }
  // Multiple values associated with the whole molecule. Use
  // AtomicMoleculeProperty for one value per atom.
  message MultiScalarMolecularProperty {
    repeated double val = 1;
  }
  // One value for each atom. values and prec should be exactly the same
  // length as atoms in the BondTopology.
  message AtomicMolecularProperty {
    repeated double val = 1;
    // Note that this is just a prec with which the value is stored and is
    // NOT an estimate of the confidence/accuracy of the value compared to
    // reality.
    repeated double prec = 2;
  }
  // A vector property.
  message Vector3DMolecularProperty {
    double x = 1;
    double y = 2;
    double z = 3;
  }

  // A rank 2 tensor property.
  message Rank2MolecularProperty {
    repeated double matrix_values_deprecated = 1 [deprecated = true];
    double xx = 2;
    double yy = 3;
    double zz = 4;
    double xy = 5;
    double xz = 6;
    double yz = 7;
  }
  // A rank 3 tensor property
  message Rank3MolecularProperty {
    repeated double tensor_values_deprecated = 1 [deprecated = true];
    double xxx = 2;
    double yyy = 3;
    double zzz = 4;
    double xxy = 5;
    double xxz = 6;
    double xyy = 7;
    double yyz = 8;
    double xzz = 9;
    double yzz = 10;
    double xyz = 11;
  }
  // Normal modes with atomic disp.
  message NormalMode {
    // Units: unitless, vector norm is scaled to 1.
    message AtomicDisplacement {
      double x = 1;
      double y = 2;
      double z = 3;
    }
    // Should be the same length as the number of atoms in BondTopology.
    repeated AtomicDisplacement disp = 1;
  }

  // Record of errors encountered during the computations.
  Errors calc = 1 [(availability) = COMPLETE];

  // The original pipeline used SMILES from OpenBabel which were sometimes
  // unintnetionally aromatic SMILES. or simplified lookup in the database
  // we are using RDKit canonical SMILES. If these differ from the original
  // OpenBabel smiles, then we put the original OpenBabel SMILES here.
  // If they are the same, we leave everything as is.
  optional string smiles_openbabel = 175 [(availability) = COMPLETE];

  // We use suffixes on the properties to indicate the level of theory and, in
  // many cases, the basis set.
  // * hf = Hartree-Fock
  // * b3lyp = DFT with B3LYP functional
  // * pbe0 = DFT with PBE0 functional
  // * pbe0d3 = DFT with PBE0D3 functional
  // * cc2 = simplified coupled cluster doubles
  // * atomic = ATOMIC2/B5,B6

  // 3 * num_atoms.
  // Units: km/mol
  MultiScalarMolecularProperty vib_intens = 4
      [(availability) = STANDARD];

  // Values that are negative are imaginary. Frequencies should be exactly
  // the same length as atoms in the BondTopology.
  // Units: wave numbers, cm^-1
  MultiScalarMolecularProperty vib_freq = 173
      [(availability) = STANDARD];
  // TODO(pfr): document better
  // 3 * num_atoms normal modes with num_atoms x-, y-, and z- displacements
  // each. Each normal mode corresponds to a particular harmonic_intensity.
  repeated NormalMode vib_mode = 174 [(availability) = COMPLETE];

  // Single point energies are all in atomic units (Hartrees).
  // Single point energy PBE0D3/6-311Gd.
  ScalarMolecularProperty spe_std_pbe0d3_6311gd = 6
      [(availability) = COMPLETE];
  // Single point energy PBE0/6-311Gd.
  ScalarMolecularProperty spe_check_pbe0_6311gd_tmol = 7
      [(availability) = COMPLETE];
  // Single point energy PBE0D3/6-311Gd computed with MRCC.
  ScalarMolecularProperty spe_check_pbe0_6311gd_mrcc = 8
      [(availability) = COMPLETE];
  // Single point energy PBE0D3/6-311Gd computed with ORCA.
  ScalarMolecularProperty spe_check_pbe0_6311gd_orca = 9
      [(availability) = COMPLETE];
  // Single point cation energy PBE0/6-311Gd(CAT) computed with Turbomole.
  ScalarMolecularProperty spe_stdcat_pbe0_6311gd_tmol = 10
      [(availability) = COMPLETE];
  // Single point cation energy PBE0/6-311Gd(CAT) computed with MRCC.
  ScalarMolecularProperty spe_stdcat_pbe0_6311gd_mrcc = 11
      [(availability) = COMPLETE];
  // Single point cation energy PBE0/6-311Gd(CAT) computed with ORCA.
  ScalarMolecularProperty spe_stdcat_pbe0_6311gd_orca = 12
      [(availability) = COMPLETE];
  // Single point energy PBE0/aug-pc-1.
  ScalarMolecularProperty spe_std_pbe0_augpc1 = 13
      [(availability) = COMPLETE];
  // Single point energy HF/6-31Gd.
  ScalarMolecularProperty spe_std_hf_631gd = 14
      [(availability) = COMPLETE];
  // Single point energy B3LYP/6-31++Gdp.
  ScalarMolecularProperty spe_std_b3lyp_631ppgdp = 15
      [(availability) = COMPLETE];
  // Single point energy B3LYP/aug-pcS-1.
  ScalarMolecularProperty spe_std_b3lyp_augpcs1 = 16
      [(availability) = COMPLETE];
  // Single point energy PBE0/6-31++Gdp.
  ScalarMolecularProperty spe_std_pbe0_631ppgdp = 17
      [(availability) = COMPLETE];
  // Single point energy PBE0/aug-pcS-1.
  ScalarMolecularProperty spe_std_pbe0_augpcs1 = 18
      [(availability) = COMPLETE];
  // Single point energy HF/TZVP.
  ScalarMolecularProperty spe_std_hf_tzvp = 19
      [(availability) = COMPLETE];
  // Single point energy MP2/TZVP.
  ScalarMolecularProperty spe_std_mp2_tzvp = 20
      [(availability) = COMPLETE];
  // Single point energy CC2/TZVP.
  ScalarMolecularProperty spe_std_cc2_tzvp = 21
      [(availability) = COMPLETE];
  // Single point energy HF/3.
  ScalarMolecularProperty spe_std_hf_3 = 22
      [(availability) = COMPLETE];
  // Single point energy MP2/3.
  ScalarMolecularProperty spe_std_mp2_3 = 23
      [(availability) = COMPLETE];
  // Single point energy HF/4.
  ScalarMolecularProperty spe_std_hf_4 = 24
      [(availability) = COMPLETE];
  // Single point energy MP2/4.
  ScalarMolecularProperty spe_std_mp2_4 = 25
      [(availability) = COMPLETE];
  // Single point energy HF/(34).
  ScalarMolecularProperty spe_std_hf_34 = 26
      [(availability) = COMPLETE];
  // Single point energy MP2/(34).
  ScalarMolecularProperty spe_std_mp2_34 = 27
      [(availability) = COMPLETE];
  // Single point energy HF/CVTZ.
  ScalarMolecularProperty spe_std_hf_cvtz = 28
      [(availability) = COMPLETE];
  // Single point energy MP2ful/CVTZ.
  ScalarMolecularProperty spe_std_mp2full_cvtz = 29
      [(availability) = COMPLETE];
  // Single point energy HF/2sp.
  ScalarMolecularProperty spe_std_hf_2sp = 30
      [(availability) = COMPLETE];
  // Single point energy MP2/2sp.
  ScalarMolecularProperty spe_std_mp2_2sp = 31
      [(availability) = COMPLETE];
  // Single point energy CCSD/2sp.
  ScalarMolecularProperty spe_std_ccsd_2sp = 32
      [(availability) = COMPLETE];
  // Single point energy CCSD(T)/2sp.
  ScalarMolecularProperty spe_std_ccsd_t_2sp = 33
      [(availability) = COMPLETE];
  // Single point energy HF/2sd.
  ScalarMolecularProperty spe_std_hf_2sd = 34
      [(availability) = COMPLETE];
  // Single point energy MP2/2sd.
  ScalarMolecularProperty spe_std_mp2_2sd = 35
      [(availability) = COMPLETE];
  // Single point energy CCSD/2sd.
  ScalarMolecularProperty spe_std_ccsd_2sd = 36
      [(availability) = COMPLETE];
  // Single point energy CCSD(T)/2sd.
  ScalarMolecularProperty spe_std_ccsd_t_2sd = 37
      [(availability) = COMPLETE];
  // Single point energy HF/3Psd.
  ScalarMolecularProperty spe_std_hf_3psd = 38
      [(availability) = COMPLETE];
  // Single point energy MP2/3Psd.
  ScalarMolecularProperty spe_std_mp2_3psd = 39
      [(availability) = COMPLETE];
  // Single point energy CCSD/3Psd.
  ScalarMolecularProperty spe_std_ccsd_3psd = 40
      [(availability) = COMPLETE];
  // Single point energy ATOMIC/B5.
  ScalarMolecularProperty spe_comp_b5 = 41
      [(availability) = STANDARD];
  // Single point energy ATOMIC/B6.
  ScalarMolecularProperty spe_comp_b6 = 42
      [(availability) = COMPLETE];
  // Single point energy for E_{CCSD}.
  ScalarMolecularProperty spe_comp_eccsd = 120
      [(availability) = COMPLETE];
  // TODO(pfr, dirk): double check that atomic_b5 is still reported even when
  // other thermochemistry fails.

  // Zero point energy, unscaled.
  // Units: kcal/mol
  ScalarMolecularProperty vib_zpe = 43 [(availability) = COMPLETE];

  // Units: atomic units (Hartrees)
  // HOMO PBE0/6-311Gd.
  ScalarMolecularProperty orb_ehomo_pbe0_6311gd = 47 [(availability) = STANDARD];
  // LUMO PBE0/6-311Gd.
  ScalarMolecularProperty orb_elumo_pbe0_6311gd = 48 [(availability) = STANDARD];
  // HOMO PBE0/aug-pc-1.
  ScalarMolecularProperty orb_ehomo_pbe0_augpc1 = 49 [(availability) = COMPLETE];
  // LUMO PBE0/aug-pc-1.
  ScalarMolecularProperty orb_elumo_pbe0_augpc1 = 50 [(availability) = COMPLETE];
  // HOMO PBE0/6-31++Gdp.
  ScalarMolecularProperty orb_ehomo_pbe0_631ppgdp = 51 [(availability) = COMPLETE];
  // LUMO PBE0/6-31++Gdp.
  ScalarMolecularProperty orb_elumo_pbe0_631ppgdp = 52 [(availability) = COMPLETE];
  // HOMO PBE0/aug-pcS-1.
  ScalarMolecularProperty orb_ehomo_pbe0_augpcs1 = 53 [(availability) = COMPLETE];
  // LUMO PBE0/aug-pcS-1.
  ScalarMolecularProperty orb_elumo_pbe0_augpcs1 = 54 [(availability) = COMPLETE];
  // HOMO B3LYP/6-31++Gdp.
  ScalarMolecularProperty orb_ehomo_b3lyp_631ppgdp = 55 [(availability) = COMPLETE];
  // LUMO B3LYP/6-31++Gdp.
  ScalarMolecularProperty orb_elumo_b3lyp_631ppgdp = 56 [(availability) = COMPLETE];
  // HOMO B3LYP/aug-pcS-1.
  ScalarMolecularProperty orb_ehomo_b3lyp_augpcs1 = 57 [(availability) = COMPLETE];
  // LUMO B3LYP/aug-pcS-1.
  ScalarMolecularProperty orb_elumo_b3lyp_augpcs1 = 58 [(availability) = COMPLETE];
  // HOMO HF/6-31Gd.
  ScalarMolecularProperty orb_ehomo_hf_631gd = 59 [(availability) = COMPLETE];
  // LUMO HF/6-31Gd.
  ScalarMolecularProperty orb_elumo_hf_631gd = 60 [(availability) = COMPLETE];
  // HOMO HF/TZVP.
  ScalarMolecularProperty orb_ehomo_hf_tzvp = 61 [(availability) = COMPLETE];
  // LUMO HF/TZVP.
  ScalarMolecularProperty orb_elumo_hf_tzvp = 62 [(availability) = COMPLETE];
  // HOMO HF/3.
  ScalarMolecularProperty orb_ehomo_hf_3 = 63 [(availability) = COMPLETE];
  // LUMO HF/3.
  ScalarMolecularProperty orb_elumo_hf_3 = 64 [(availability) = COMPLETE];
  // HOMO HF/4.
  ScalarMolecularProperty orb_ehomo_hf_4 = 65 [(availability) = COMPLETE];
  // LUMO HF/4.
  ScalarMolecularProperty orb_elumo_hf_4 = 66 [(availability) = COMPLETE];
  // HOMO HF/CVTZ.
  ScalarMolecularProperty orb_ehomo_hf_cvtz = 67 [(availability) = COMPLETE];
  // LUMO HF/CVTZ.
  ScalarMolecularProperty orb_elumo_hf_cvtz = 68 [(availability) = COMPLETE];

  // The 5 smallest excitation energies and oscillator strengths.
  // Units: atomic units
  MultiScalarMolecularProperty exc_ene_cc2_tzvp = 69
      [(availability) = STANDARD];
  MultiScalarMolecularProperty exc_os_cc2_tzvp = 70
      [(availability) = STANDARD];

  // NMR isotropic shielding are in ppm
  // NMR isotropic shielding PBE0/6-31++Gdp.
  AtomicMolecularProperty nmr_pbe0_631ppgdp = 71
      [(availability) = COMPLETE];
  // NMR isotropic shielding PBE0/aug-pcS-1.
  AtomicMolecularProperty nmr_pbe0_augpcs1 = 72
      [(availability) = STANDARD];
  // NMR isotropic shielding B3LYP/6-31++Gdp.
  AtomicMolecularProperty nmr_b3lyp_631ppgdp = 73
      [(availability) = COMPLETE];
  // NMR isotropic shielding B3LYP/aug-pcS-1.
  AtomicMolecularProperty nmr_b3lyp_augpcs1 = 74
      [(availability) = COMPLETE];

  // All partial charges are in electron units
  // Partial charges MUL-PBE0/aug-pc-1.
  AtomicMolecularProperty chg_mul_pbe0_augpc1 = 75
      [(availability) = STANDARD];
  // Partial charges MUL-HF/6-31Gd.
  AtomicMolecularProperty chg_mul_hf_631gd = 76
      [(availability) = COMPLETE];
  // Partial charges LOE-PBE0/aug-pc-1.
  AtomicMolecularProperty chg_loe_pbe0_augpc1 = 77
      [(availability) = STANDARD];
  // Partial charges LOE-HF/6-31Gd.
  AtomicMolecularProperty chg_loe_hf_631gd = 78
      [(availability) = COMPLETE];
  // Partial charges NAT-PBE0/aug-pc-1.
  AtomicMolecularProperty chg_nat_pbe0_augpc1 = 79
      [(availability) = STANDARD];
  // Partial charges NAT-HF/6-31Gd.
  AtomicMolecularProperty chg_nat_hf_631gd = 80
      [(availability) = COMPLETE];
  // Partial charges PON-PBE0/aug-pc-1.
  AtomicMolecularProperty partial_charges_paboon_pbe0_aug_pc_1 = 81
      [(availability) = INTERNAL_ONLY];
  // Partial charges PON-HF/6-31Gd.
  AtomicMolecularProperty partial_charges_paboon_hf_6_31gd = 82
      [(availability) = INTERNAL_ONLY];
  // Partial charges ESP-PBE0/aug-pc-1.
  AtomicMolecularProperty chg_esp_pbe0_augpc1 = 83
      [(availability) = STANDARD];
  // Partial charges ESP-HF/6-31Gd.
  AtomicMolecularProperty chg_esp_hf_631gd = 84
      [(availability) = COMPLETE];

  // Dipole-dipole polarizability PBE0/aug-pc-1.
  // Units: atomic units
  Rank2MolecularProperty elec_pol_pbe0_augpc1 = 85
      [(availability) = STANDARD];
  // Dipole-dipole polarizability HF/6-31Gd.
  // Units: atomic units
  // DEPRECATED
  Rank2MolecularProperty dipole_dipole_polarizability_hf_6_31gd = 86
      [(availability) = INTERNAL_ONLY];

  // Dipole moment PBE0/aug-pc-1.
  // Units: Debye
  Vector3DMolecularProperty elec_dip_pbe0_augpc1 = 87
      [(availability) = STANDARD];
  // Dipole moment HF/6-31Gd.
  // Units: Debye
  Vector3DMolecularProperty elec_dip_hf_631gd = 88
      [(availability) = COMPLETE];

  // Units: atomic units (bohr^2 * electron)
  // Quadrupole moment PBE0/aug-pc-1.
  Rank2MolecularProperty elec_qua_pbe0_augpc1 = 89
      [(availability) = STANDARD];
  // Quardupole moment HF/6-31Gd.
  Rank2MolecularProperty elec_qua_hf_631gd = 90
      [(availability) = COMPLETE];

  // Units: atomic units (bohr^3 * electron)
  // Octopole moment PBE0/aug-pc-1.
  Rank3MolecularProperty elec_oct_pbe0_augpc1 = 91
      [(availability) = STANDARD];
  // Octopole moment HF/6-31Gd.
  Rank3MolecularProperty elec_oct_hf_631gd = 92
      [(availability) = COMPLETE];

  // INTERNAL_ONLY
  // Information about the compute cluster that ran this calculation.
  optional string compute_cluster_info = 93 [(availability) = INTERNAL_ONLY];

  optional bool symmetry_used_in_calculation = 94
      [(availability) = INTERNAL_ONLY];

  // Energies and and overall gradient norm at the end of the intial geometry
  // optimization and at the end of the final geometry optimization.
  // Units: atomic units (Hartrees)
  // TODO(pfr): shoudl match spe_std_pbe0d3_6311gd
  ScalarMolecularProperty initial_geometry_energy_deprecated = 95
      [(availability) = STANDARD];
  ScalarMolecularProperty initial_geometry_gradient_norm_deprecated = 96
      [(availability) = STANDARD];
  // The energy is computed by Turbomole with PBE0-D3/6-311G(d)
  // Units: atomic units (Hartrees)
  ScalarMolecularProperty optimized_geometry_energy_deprecated = 97
      [(availability) = STANDARD];
  ScalarMolecularProperty optimized_geometry_gradient_norm_deprecated = 98
      [(availability) = STANDARD];

  // The eigenvalues of the moment of inertia tensor.
  // Units: Mhz
  Vector3DMolecularProperty rotational_constants_deprecated = 99
      [(availability) = STANDARD];

  // Strings describing the bond separation reaction used by ATOMIC2 analysis.
  // TODO(pfr): example would be helpful.
  StringMolecularProperty at2_gen_bsr_left = 122
      [(availability) = STANDARD];
  StringMolecularProperty at2_gen_bsr_right = 123
      [(availability) = STANDARD];

  // Notes on field naming below
  // * "atomic_bX" refers to the ATOMIC-2 protocol with the bX model
  // * "atomic_bX_um" refers to the ATOMIC-2um protocol with the bX model (a
  //   bias corrected version of "atomic")
  // * "eccsd" refers to E_{CCSD}
  // * "eccsd_um" refers to E_{CCSD} bias corrected in the same way as the
  //   ATOMIC-2 protocol.
  // * _ci refers to the one sided length of an approximate 95% confidence
  //   interval.

  // Bond separation energies, in kcal/mol.
  ScalarMolecularProperty at2_std_b5_ereac = 124
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_b5_ereac = 125
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_b5_ereac_unc = 126
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_std_b6_ereac = 127
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_b6_ereac = 128
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_b6_ereac_unc = 129
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_std_eccsd_ereac = 130
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_eccsd_ereac = 131
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_eccsd_ereac_unc = 132
      [(availability) = COMPLETE];

  // ZPE-exclusive atomization energy in kcal/mol.
  ScalarMolecularProperty at2_std_b5_eae = 133
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_b5_eae = 134
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_b5_eae_unc = 135
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_std_b6_eae = 136
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_b6_eae = 137
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_b6_eae_unc = 138
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_std_eccsd_eae = 139
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_eccsd_eae = 140
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_eccsd_eae_unc = 141
      [(availability) = COMPLETE];

  // ZPE-inclusive atomization energy in kcal/mol.
  ScalarMolecularProperty at2_std_b5_ea0 = 142
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_b5_ea0 = 143
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_b5_ea0_unc = 144
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_std_b6_ea0 = 145
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_b6_ea0 = 146
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_b6_ea0_unc = 147
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_std_eccsd_ea0 = 148
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_eccsd_ea0 = 149
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_eccsd_ea0_unc = 150
      [(availability) = COMPLETE];

  // Enthalpy of formation for T = 0K
  // Units: kcal/mol
  ScalarMolecularProperty at2_std_b5_hf0 = 151
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_b5_hf0 = 152
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_b5_hf0_unc = 153
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_std_b6_hf0 = 154
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_b6_hf0 = 155
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_b6_hf0_unc = 156
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_std_eccsd_hf0 = 157
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_eccsd_hf0 = 158
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_eccsd_hf0_unc = 159
      [(availability) = COMPLETE];

  // Enthalpy of formation for T = 298.15K.
  // Units: kcal/mol
  ScalarMolecularProperty at2_std_b5_hf298 = 160
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_b5_hf298 = 161
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_b5_hf298_unc = 162
      [(availability) = STANDARD];
  ScalarMolecularProperty at2_std_b6_hf298 = 163
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_b6_hf298 = 164
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_b6_hf298_unc = 165
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_std_eccsd_hf298 = 166
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_eccsd_hf298 = 167
      [(availability) = COMPLETE];
  ScalarMolecularProperty at2_um_eccsd_hf298_unc = 168
      [(availability) = COMPLETE];

  // Zero point energy, scaled.
  // Units: kcal/mol
  ScalarMolecularProperty at2_std_zpe = 169 [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_zpe = 170 [(availability) = STANDARD];
  ScalarMolecularProperty at2_um_zpe_unc = 171 [(availability) = STANDARD];

  // Number of imaginary frequencies.
  optional int32 number_imaginary_frequencies = 110
      [(availability) = INTERNAL_ONLY];
  // Number of times the optimization was run.
  optional int32 number_of_optimization_runs = 111
      [(availability) = INTERNAL_ONLY];

  // For the optimized geoemtry, energy just for the nuclei.
  // Units: atomic units (Hartree)
  ScalarMolecularProperty nuclear_repulsion_energy_deprecated = 112
      [(availability) = COMPLETE];

  // Diagnostics D1(CCSD/2sp).
  ScalarMolecularProperty wf_diag_d1_2sp = 113
      [(availability) = COMPLETE];
  // DEPRECATED
  // Diagnostics D1(CCSD/2sd).
  ScalarMolecularProperty diagnostics_d1_ccsd_2sd = 114
      [(availability) = INTERNAL_ONLY];
  // DEPRECATED
  // Diagnostics D1(CCSD/3Psd).
  ScalarMolecularProperty diagnostics_d1_ccsd_3psd = 115
      [(availability) = INTERNAL_ONLY];
  // Diagnostics T1(CCSD/2sp).
  ScalarMolecularProperty wf_diag_t1_2sp = 116
      [(availability) = COMPLETE];
  // Diagnostics T1(CCSD/2sd).
  ScalarMolecularProperty wf_diag_t1_2sd = 117
      [(availability) = STANDARD];
  // Diagnostics T1(CCSD/3Psd).
  ScalarMolecularProperty wf_diag_t1_3psd = 118
      [(availability) = COMPLETE];
  // Diagnostics T1(CCSD/2sp) excess.
  ScalarMolecularProperty at2_gen_t1_exc = 121
      [(availability) = STANDARD];

  // INTERNAL_ONLY
  // The calculation statistics only provide overall runtime per calculation,
  // but no context on what caused a certain runtime.
  // Repeated because we may have multiple software runs.
  message CalculationStatistics {
    string computing_location = 1;
    // units are 0.1 minutes
    string timings = 2;
  }
  repeated CalculationStatistics calculation_statistics = 119
      [(availability) = INTERNAL_ONLY];

  // This message is a set of comparisons to values computed in other fields to
  // those computed with Gaussian as a sanity check.
  message GaussianSanityCheck {
    float energy_pbe0_6_311gd_diff = 1;
    float max_force = 2;
    float max_frequencies_diff = 3;
    float mean_frequencies_diff = 4;
    float max_intensities_diff = 5;
    float mean_intensities_diff = 6;
    float energy_hf_6_31gd_diff = 7;
    float max_dipole_components_diff = 8;
    float max_quadropole_components_diff = 9;
    float max_octopole_components_diff = 10;
  }
  GaussianSanityCheck gaussian_sanity_check = 172
      [(availability) = INTERNAL_ONLY];

  // Field numbers for deleted fields.
  reserved 2, 3, 5, 100 to 109, 44 to 46;
}

// Molecule is the primary protocol buffer for the dataset.
//
// A Molecule represents a particular optimized geometry (that might derive
// from several different initial geometries) and be associated with several
// BondTopology.
message Molecule {
  // The molecule ID is unique across all molecules. It is constructed with
  // the topo_id of one of the bond topologies that generated this
  // geometry plus a unique id (<1000): topo_id * 1000 + unique id
  int32 mol_id = 1;

  // INTERNAL_ONLY In the original .dat format, we kept the 1-based dense index
  // of this molecule for the bond_topology and we store the index to ensure
  // perfect production of the .dat file.
  int32 original_molecule_index = 2 [(availability) = INTERNAL_ONLY];

  // Geometry produced by minimization by with an empirical force
  // field. This is repeated because Molecules are deduplicated by
  // their opt_geo and multiple initial_geometry can
  // converge to the same opt_geo.  Although duplicates can
  // be found across bond topologies, *only* the duplicates produced
  // with two different geometry optimizations starting from the same
  // bond topology are included. This is to avoid the complex and
  // potentially ambiguous matching of corresponding atoms across bond
  // topologies.
  repeated Geometry ini_geo = 3;
  // Geometry produced by PBE0D3 minimization from the initial_geometry. Some
  // entries in the COMPLETE dataset will be missing opt_geo if the
  // geometry did not converge. All entries in the STANDARD dataset will include
  // opt_geo.
  Geometry opt_geo = 4;

  // After geometry optimization, duplicate geometries were identified and one
  // of the geometries was kept through a series of heuristics.
  // If this Molecule was discarded:
  // * It will only be present in the COMPLETE dataset, not the STANDARD.
  // * duplicate_of is set with the mol_id that this Molecule was
  //   discarded in favor of.
  // If this Molecule was kept over other duplicates:
  // * duplicate_found is set with all the mol_id of the discarded Molecule.
  int32 duplicate_of = 5;
  repeated int32 duplicate_found = 6;

  // All of the calculated properties.
  Properties prop = 7;

  // All of the bond topologies that describe this geometry.
  // These are ordered by how well the bond topology describes this geometry.
  // TODO(pfr): double check the field name.
  // TODO(pfr): add a reference to the paper describing this
  repeated BondTopology bond_topo = 8;

  // DEPRECATED: old location for this value
  Properties.FateCategory fate_deprecated = 9;

  // Specifies which database (STANDARD or COMPLETE) this Molecule
  // will be in. Note that everything in STANDARD is also included in COMPLETE.
  AvailabilityEnum which_database_deprecated = 10;
}

///////////// MultipleMolecules

// Bundles a number of molecules. This message is just needed to simplify
// testing. It will not be present in the main dataset.
// multiple Molecule entries.
message MultipleMolecules {
  repeated Molecule molecules = 1;
}

///////////// BondTopologySummary

// BondTopologySummary is a summarized form with a very small amount of
// information about each bond topology that was considered. Note the close
// alignment to the "fate" field in Molecule.
message BondTopologySummary {
  BondTopology bond_topology = 1;

  // Attempted molecules are ones that were attempted to be minimized with
  // PBE0D3. These resulted from potentially many attempts to create 3D
  // coordinates and minimze with MMFF.
  int32 count_attempted_molecules = 2;

  // Next set of fields track the result of each attempted molecule. The sum of
  // count_duplicates_same_topology
  // count_duplicates_different_topology
  // count_failed_geometry_optimization
  // count_non_duplicate_kept_geometry
  // should be equal to count_attempted_molecules
  // These fields count duplicates after the PBE0D3 geometry minimization.
  int32 count_duplicates_same_topology = 3;
  int32 count_duplicates_different_topology = 4;
  // Failed geometry optimization is any of a number of failures during hte
  // initial PBE0D3 optimization step.
  int32 count_failed_geometry_optimization = 5;
  // Geometries which should proceed to next level of calculations.
  int32 count_kept_geometry = 6;

  // Next set of fields tracks the results of the calculations after geometry
  // optimization. The sum of
  // count_missing_calculation
  // count_calculation_with_error
  // count_calculation_with_warning
  // count_calculation_success
  // should be equal to count_kept_geometry.
  // Missing calculations are ones for which we should have results, but are
  // missing them. count_missing_calculation should be 0 in the final dataset.
  int32 count_missing_calculation = 7;
  int32 count_calculation_with_error = 8;
  int32 count_calculation_with_warning = 12;
  int32 count_calculation_success = 9;

  // After all calculations were done, we detected all bond topologies
  // which are consistent with a molecule's geometry, with several
  // different criteria (DDT, MLCR, CSD (see Source above for
  // details)). These fields count the results of these "detected"
  // geometries. Note that the geometries counted in the "duplicate"
  // fields above will probably be counted here.
  int32 count_detected_match_itc_with_error = 10;
  int32 count_detected_match_itc_with_warning = 13;
  int32 count_detected_match_itc_success = 11;

  int32 count_detected_match_mlcr_with_error = 15;
  int32 count_detected_match_mlcr_with_warning = 16;
  int32 count_detected_match_mlcr_success = 17;

  int32 count_detected_match_csd_with_error = 18;
  int32 count_detected_match_csd_with_warning = 19;
  int32 count_detected_match_csd_success = 20;

  // The same topology can be detected multiple times for a single molecule.
  // Typically this would be for kekulized forms that results in an isomorphic
  // graph. When this happens, the bond topology id is only counted once per
  // molecule by the other fields and counted once per molecule here.
  int32 count_multiple_detections = 14;
}

